********************************************************************************************
********* COMBINING THIESSEN POLYGON DATASETS TO INCLUDE AN EXTRA MEASURE OF DEFORESTATION ***********
********* ALSO GENERATING SOME VARIABLES *************************************************************
*** DATE: 8/10/2022
*** THIS VERSION: STATA 16 ********

import delimited "C:\Users\alixgarj\Desktop\ecoregions.csv", clear 

keep if eco_id ==  50201 | eco_id ==   50302  | eco_id == 50303  | eco_id ==  50526 | eco_id ==   50701 | eco_id ==  51201  | eco_id ==  51301  | eco_id ==  51302   | eco_id == 51303  | eco_id ==  51306 | eco_id ==  51307 | eco_id ==   51310  | eco_id ==  51311   | eco_id == 51312  | eco_id ==  60112  | eco_id == 60113  | eco_id ==  60114  | eco_id ==  60146  | eco_id ==  60148  | eco_id ==   60154 | eco_id ==   60161 | eco_id ==   60162  | eco_id ==  60176 | eco_id ==  60177  | eco_id ==   60181 | eco_id ==  60204  | eco_id ==   60205 | eco_id ==   60209  | eco_id ==   60211 | eco_id ==  60217  | eco_id ==  60227   | eco_id ==  60228  | eco_id ==  60230  | eco_id ==   60233  | eco_id ==   60235 | eco_id ==    60303  | eco_id ==  60307  | eco_id ==   60308  | eco_id ==   60309  | eco_id ==   60310 | eco_id ==   61314  | eco_id ==   61316  | eco_id ==   61403  | eco_id ==  61404  | eco_id ==   61407

bysort eco_id: keep if _n == 1
sort biome
 
keep biome eco_id eco_name

save "${data}biomeidentifiers.dta", replace


****************** READING IN LANDSAT DATA ********************
import delimited "${data}TinyPolysShapes\Landsat\path_row_counts_2000-2015_L5.csv", clear
gen path = substr(path_row,1,2)
destring(path), replace
gen row = substr(path_row,-2,.)
destring(row), replace
rename num_scenes num_scenesL5 
keep if year == 2010 | year == 2011
rename year era
sort path row
gen path_rown = path*1000+row
reshape wide num_scenesL5, i(path_rown) j(era)
la var num_scenesL52010 "Number of cloud-free scenes L5, 2010"
la var num_scenesL52011 "Number of cloud-free scenes L5, 2011"

save "${data}TinyPolysShapes\Landsat\path_row_counts_2000-2015_L5.dta", replace

import delimited "${data}TinyPolysShapes\Landsat\path_row_counts_2000-2015_L7.csv", clear
gen path = substr(path_row,1,2)
destring(path), replace
gen row = substr(path_row,-2,.)
destring(row), replace
rename num_scenes num_scenesL7 
keep if year == 2010 | year == 2011

rename year era
sort path row
gen path_rown = path*1000+row
reshape wide num_scenesL7, i(path_rown) j(era)
la var num_scenesL72010 "Number of cloud-free scenes L7, 2010"
la var num_scenesL72011 "Number of cloud-free scenes L7, 2011" 

save "${data}TinyPolysShapes\Landsat\path_row_counts_2000-2015_L7.dta", replace

import delimited "${data}Grid5kmLandsat.csv", clear
keep pagenumber path row shape_area
rename pagenumber grid_id
bysort grid_id: egen maxarea = max(shape_area)
keep if shape_area == maxarea
bysort grid_id: keep if _n == 1
sort path row
merge m:m path row using "${data}TinyPolysShapes\Landsat\path_row_counts_2000-2015_L5.dta"
drop _merge 
sort path row
merge m:m path row using "${data}TinyPolysShapes\Landsat\path_row_counts_2000-2015_L7.dta"
drop if _merge == 1 | _merge == 2
drop _merge
save "${data}Grid5kmLandsat.dta", replace
use "${data}defor_frontier_panel.dta", clear
replace era = 2011 if era == 2013
merge m:m grid_id using "${data}Grid5kmLandsat.dta"

keep if era == 2010 | era == 2011
xtset grid_id era

drop _merge path_rown t1 t2 t3 t3 t4 t5 t6 d1 d2 d3 e1 e2 e3 b1 d1_t3 d2_t3 d3_t3 e1_t3 e2_t3 ///
e3_t3 b2_t3 b3_t3 b4_t3 b8_t3 elev_stdt3 ln_hwy60t3 d1_t4 d2_t4 d3_t4 e1_t4 e2_t4 e3_t4 b2_t4 b3_t4 ///
b4_t4 b8_t4 elev_stdt4 ln_hwy60t4 d1_t5 d2_t5 d3_t5 e1_t5 e2_t5 e3_t5 b2_t5 b3_t5 b4_t5 b8_t5 elev_stdt5 ///
vegnf time total_veg ln_hwy60t5
reshape wide forest hansen , i(grid_id) j(era)
drop hansen2011 forest2010
la var hansen2010 "Proportion forest Hansen data, 2010"
la var forest2011 "Proportion forest Serie V"
replace num_scenesL52010 = 0 if num_scenesL52010 == .
replace num_scenesL72010 = 0 if num_scenesL72010 == .
replace num_scenesL52011 = 0 if num_scenesL52011 == .
replace num_scenesL72011 = 0 if num_scenesL72011 == .


gen govmhansen= abs(forest2011-hansen2010)
gen forestbin = forest2011 > 0.02
*gen hansenbin = hansen2010 > 0.004
gen hansenbin = hansen2010 > 0.02

gen govmhansenb = abs(forestbin - hansenbin)
replace elev_mean = elev_mean / 1000
la var elev_mean "Mean elevation, 1000s m"
la var forest2011 "Proportion forest, 2011, GOM"
la var hansen2010 "Proportion forest, 2010, Hansen"
la var forestbin "Binary forest, 2011, GOM"
la var hansenbin "Binary forest, 2010, Hansen"
la var govmhansen "Abs(GOM - Hansen, proportion)"
la var govmhansenb "Abs(GOM - Hansen, binary)"
gen difscenes = abs(num_scenesL52011-num_scenesL72010)
la var difscenes  "Abs(L5 scenes 2011 - L7 scenes 2010)"
gen slope_scenesL7 = slope_mean * num_scenesL72010
la var slope_scenesL7 "Mean slope x L7 scenes 2010"
gen slope_scenesL72011 = slope_mean * num_scenesL72011
la var slope_scenesL7 "Mean slope x L7 scenes 2011"
gen slope_dif = difscenes * slope_mean
la var slope_dif "Mean slope x difference in scenes"
gen minscenes = min(num_scenesL72010,num_scenesL52011)
la var minscenes "Minimum of Landsat scenes, 2010-2011"
gen slopescenes = minscenes * slope_mean
la var slopescenes "Minimum of Landsat scenes x slope mean"


gen mtropical = biome == 1
la var mtropical "Moist tropical biome"
gen dtropical = biome == 2
la var dtropical "Dry tropical biome"
gen pine = biome == 3 | biome == 5
la var pine "Pine/oak biome"
gen dry = biome == 0 | biome == 7 | biome == 13
la var dry "Dry woodlands or grasslands"
gen mangrove = biome == 14
la var mangrove "Mangrove biome"

save "${data}defor_frontier_panel_wLS.dta", replace


************************* TIDYING UP TINY POLYS DATASET FROM I2I PROJECT TO MAKE PANEL THIS RAW DATA IS NOT PUBLICLY AVAILABLE *********************
use "${data}PESi2iPanelDataLongv6_nooutsides.dta", clear


*******GENERATE A FEW USEFUL VARIABLES ***************
set more off
sort APEJNCNID

by APEJNCNID: egen ever_hidro = max(mod_hidro)
bysort APEJNCNID: egen tempmaxe = max(elaboration)

la var ever_hidro "Ever applied for PSAH"


gen loc_wt = round(TPApAreaHa)
la var loc_wt "Polygon area, rounded"
la var ent_cve "State code"
la var yr "Year"
drop binary_defor
gen binary_defor = defor_50per > 0
la var binary_defor "Conditional deforestation $>$ 0"

* Generate the lag of beneficiary 
 sort APEJNCNID yr
 by APEJNCNID: gen beneL1 = beneficiary[_n-1] if yr==yr[_n-1]+1
 replace beneL1 = 0 if yr == 2001
 la var beneL1 "Beneficiary 0/1, time-varying"
 
* Generate variable for percent coverage at baseline:
gen pctTC2000 = (tc2000_per50/TPApAreaHa)*100
la var pctTC2000 "Percent forest cover, 2000"

************ Allocating municipality code to private properties **********************
gen EjidoCode = EjNClNmID
replace EjidoCode = muni_cve*100 if EjidoCode == 0
la var EjidoCode "Variable for ejido FEs (municipality for private properties)"


* defining the ecoregions **

gen moistforest = per_ecoid_60162 +  per_ecoid_60114 + per_ecoid_60181 + per_ecoid_60176 + per_ecoid_60177 + per_ecoid_60161 + ///
per_ecoid_60154 + per_ecoid_60113 + per_ecoid_60148 + per_ecoid_60146 + per_ecoid_60112
la var moistforest "Proportion moist forest"

gen dryforest =  per_ecoid_60205 +  per_ecoid_60217 + per_ecoid_60204 + per_ecoid_60230 + per_ecoid_60211 + per_ecoid_60209 + per_ecoid_60235 + ///
per_ecoid_50201 + per_ecoid_60233 + per_ecoid_60227 + per_ecoid_60228 
la var dryforest "Proportion dry forest"

gen pineoak = per_ecoid_60308 +  per_ecoid_50302 + per_ecoid_50303 + per_ecoid_60307 + per_ecoid_60310 + per_ecoid_60309 + per_ecoid_60303 + per_ecoid_50526
la var pineoak "Proportion pine-oak forest"

gen grasslands = per_ecoid_50701
la var grasslands "Proportion grasslands"

gen scrub = per_ecoid_51201 +  per_ecoid_61314 + per_ecoid_51307 + per_ecoid_51301 + per_ecoid_61316 + per_ecoid_51306 + per_ecoid_51312 + per_ecoid_51302 ///
 + per_ecoid_51303 + per_ecoid_51311 + per_ecoid_51310
 la var scrub "Proportion scrub/matorral"

gen mangroves = per_ecoid_61407 +  per_ecoid_61404 + per_ecoid_61403
la var mangroves "Proportion mangroves"



********* HERE IS OUR SAMPLE: POLYGONS THAT (1) APPLIED FOR THE HYDROLOGICAL SERVICES PROGRAM,
********* (2) WERE BETWEEN 10 AND 2000 HA, (3) WITH BASELINE FOREST GREATER THAN 10 HA

keep if per_defor ~=.  // ONLY KEEP POLYGONS WITH DEFORESTATION MEASUREMENT
keep if ever_hidro==1 // ONLY KEEP HYDROLOGICAL SERVICES POLYGONS
drop if tc2000_per50 < 0.10 // KEEP IF FOREST AREA > 10 HA -- WE COULD ADJUST THIS UPWARDS...
* DROP VERY LARGE OR VERY SMALL 
drop if TPApAreaHa < 10
drop if TPApAreaHa == .
replace TPApAreaHa = 6000 if TPApAreaHa > 6000 & (cohort2010 == 1| cohort2011 == 1 | cohort2012 == 1) 
replace TPApAreaHa = 4000 if TPApAreaHa > 4000 & (cohort2003 ==1 | cohort2004 == 1 | cohort2005 == 1) 
replace TPApAreaHa = 3000 if TPApAreaHa > 3000 & (cohort2006 == 1 | cohort2007 == 1 | cohort2008 == 1 | cohort2009 == 1) 
replace TPApAreaHa = 3000 if TPApAreaHa > 3000 & (cohort2013 == 1| cohort2014 == 1) 
replace TPApAreaHa = 2000 if TPApAreaHa > 2000 & (cohort2015 == 1) 

bysort APEJNCNID: egen everbene = max(beneficiary)
 la var everbene "Ever received PES"
 
bysort APEJNCNID: egen evernonfactible = sum(nonfactible)
drop if evernonfactible > 1 & everbene == 0
drop if tempmaxe == 1 & everbene == 0
	

* Should we fix the missing slope observations?  Let's replace with the mean for the ejido or municipality for the moment: 

bysort EjidoCode: egen meanslope = mean(slope_deg_mean)
replace slope_deg_mean = meanslope if slope_deg_mean == .

gen inejido = EjNClNmID ~= 0
la var inejido "In an ejido"
keep APEJNCNID i2itpapid EjidoCode pctTC2000  dist_any_road_mt dist_5000_city_km ln_dist_maj_city elev_met_mean slope_deg_mean per_maj_ind per_defor binary_defor ent_cve muni_cve ///
 applied_in_yr threshold  yr beneL1 TPApAreaHa EjNClNmID moistforest dryforest pineoak scrub mangroves everbene inejido
 
 
merge 1:1 APEJNCNID yr using "${data}TinyPolysShapes\Landsat\tinypolyscenes.dta"
 
 la var path_row_counts_2000_2015_L5 "Cloud-free Landsat 5"
  la var path_row_counts_2000_2015_L7 "Cloud-free Landsat 7"
  drop if _merge == 2


save "${data}PESi2iPanelDataShortAugust2021.dta", replace


   
 

